from bs4 import BeautifulSoup
import sys
sys.path.append('../kernel')
sys.path.append('../model')
import spider_main
import phone_model
import time
class City_hotelController(object):
    def __init__(self):
        #实例化核心对象
        self.spider = spider_main.SpiderMain()
    def get_phone_info(self,_url,count):
        p_model = phone_model.Phone_m();
        html_cont = self.spider.downloader.downloader(_url)
        html_obj = BeautifulSoup(html_cont, 'html.parser', from_encoding='gb18030')
        a_data = html_obj.find_all('a',class_='pul-title')
        for a_tag in a_data:
            xcurl = a_tag['href']
            xtitle = a_tag['title']
            #排重
            exists_phone_info = p_model.get_xcurl_by_url(xcurl)
            if exists_phone_info:#数据已经存在
                continue
            p_model.add_city_hotel_url(xcurl,xtitle,count)
            print(xtitle)



if __name__=='__main__':
    obj=City_hotelController()
    count=101
    _url='http://product.cnmo.com/all/product_t2_p'
    while True:
        url=_url+str(count)+'.html'
        obj.get_phone_info(url,count)
        print('-------------------------------已经爬取到第 %d 页-------------------------------' % count)
        count=count+1
        if count>110:
            exit()
        time.sleep(3)#停顿1秒